import os
import time
import requests
from lxml import etree
from datetime import datetime
import copy
# 声明访问的url
url = "https://s.weibo.com/top/summary?cate=realtimehot"
# 为访问添加标识
headers = {
    'User-Agent': "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
    'Cookie': 'Cookie: SSOLoginState=1638934595; SUB=_2A25MtFgTDeRhGeNG41sS8y_LyT6IHXVsV3hbrDV8PUJbkNAKLULAkW1NSxC6-wKHfZHj0gWpTFAvYImlTNIJ-4jF; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WWxYW50fsUFkjhSQyRj6sI85NHD95Qf1hn4e0epS0zEWs4DqcjsBPS4dJSq; _s_tentry=link.csdn.net; Apache=6836598618705.596.1638945186753; UOR=link.csdn.net,s.weibo.com,link.csdn.net; SINAGLOBAL=6836598618705.596.1638945186753; ULV=1638945186761:1:1:1:6836598618705.596.1638945186753:'
}

r = requests.get(url, headers=headers)
print(r.status_code)
html_xpath = etree.HTML(r.text)
data = html_xpath.xpath('//*[@id="pl_top_realtimehot"]/table/tbody/tr/td[2]')
num = -1
# print(r.text)#调试打开

# 解决存储路径
time_path = time.strftime(
    '%Y{y}%m{m}%d{d}', time.localtime()).format(y='年', m='月', d='日')
time_name = time.strftime('%Y{y}%m{m}%d{d}%H{h}', time.localtime()).format(
    y='年', m='月', d='日', h='点')
year_path = time.strftime('%Y{y}', time.localtime()).format(y='年')
month_path = time.strftime('%m{m}', time.localtime()).format(m='月')
day_month = time.strftime('%d{d}', time.localtime()).format(d='日')
# 创建多层路径
all_path = "./数据"
if not os.path.exists(all_path):
    os.makedirs(all_path)
# 最终文件存储位置
path = all_path + '/' + year_path + month_path + day_month + '.md'
# print(path)
# 文件头部信息
with open(path, 'a', encoding='utf-8') as f:
    f.write('{} {}\n\n'.format('# ', time_name+'数据'))
f.close()
# 爬取信息
for tr in (data):
    title = tr.xpath('./a/text()')
    hot_score = tr.xpath('./span/text()')
    link = tr.xpath('//td/a/@href')
    num += 1

# 过滤第 0 条
    if num == 0:
        pass
    else:
        with open(path, 'a', encoding='utf-8') as f:
            print(num)
            f.write('{} {}、{}\n\n'.format('###', num, title[0]))
            f.write('{} {}\n\n'.format('微博当时热度为：', hot_score[0]))
            f.write('{} {}\n\n'.format(
                '点击访问：', '[链接](https://s.weibo.com' + link[0] + ')'))
        f.close()
